* negbin_fit2.sas;
title 'Fitting the negative binomial to frequency data';
data negbin;
	input y obsfreq;
	* Generate offset y values for plot;
	yexp = y - 0.1; yobs = y + 0.1;
	datalines;
0  24
1  16
2  16 
3  18
4  15
5   9
6   6
7   5
8   3
9   4
10  3
11  0
12  1
;
run;
* Print data set;
proc print data=negbin;
run;
* Descriptive statistics, save ybar, n, and var to data file;
proc univariate data=negbin;
	var y;
	histogram y / vscale=count;
	freq obsfreq;
	output out=stats mean=ybar n=n var=var;
run;
* Print output data file;
proc print data=stats;
run;
* Estimate m and k for the negative binomial distribution;
proc genmod data=negbin;
	model y = / dist=negbin;
	freq obsfreq;
	ods output ParameterEstimates=params;
run;
* Pick out value of m from genmod output;
data m;
	set params;
	if _n_ = 1;
	m = exp(Estimate);
	keep m;
run;
* Pick out value of k from genmod output;
data k;
	set params;
	if _n_ = 2;
	k = 1/Estimate;
	keep k;
run;
* Put m and k in one data file;
data params;
	merge m k;
run;
* Calculate expected frequencies using m and k;
data nbfit;
	if _n_ = 1 then set stats;
	if _n_ = 1 then set params;
	set negbin;
	nbprob = (gamma(k+y)/(gamma(y+1)*gamma(k)))*((m/(k+m))**y/(1+m/k)**k);
	expfreq = n*nbprob;
run;
* Print observed and expected frequencies;
proc print data=nbfit;
run;
* Plot observed and expected frequencies;
proc gplot data=nbfit;
	plot expfreq*yexp=1 obsfreq*yobs=2 / overlay legend=legend1 vref=0 wvref=3 
	vaxis=axis1 haxis=axis1;
	symbol1 i=needle v=circle c=red width=3 height=2;
	symbol2 i=needle v=square c=blue width=3 height=2;
	axis1 label=(height=2) value=(height=2) width=3 major=(width=2) minor=none;
	legend1 label=(height=2) value=(height=2);
run;
quit;
